import os
import re
import json

# Directory to search
base_directory = ""

# Regex pattern to match the desired log lines
log_line_pattern = re.compile(r"\[.*?\] ---- stats : ({.*?})")

# Variables to store token counts
prompt_tokens_total = 0
completion_tokens_total = 0
line_count = 0
completion_tokens_count = 0  # New counter for completion tokens > 50

# Walk through the directory and process files
for root, _, files in os.walk(base_directory):
    for file in files:
        if file.startswith("LLM") and file.endswith(".log"):
            file_path = os.path.join(root, file)
            with open(file_path, 'r') as f:
                for line in f:
                    match = log_line_pattern.search(line)
                    if match:
                        json_str = match.group(1)
                        # Replace single quotes with double quotes for valid JSON
                        if json_str.startswith("'") or "'" in json_str:
                            json_str = json_str.replace("'", '"')
                        stats = json.loads(json_str)
                        prompt_tokens_total += stats.get('prompt_tokens', 0)
                        completion_tokens = stats.get('completion_tokens', 0)
                        if completion_tokens > 54:
                            completion_tokens_total += completion_tokens
                            completion_tokens_count += 1
                        line_count += 1

# Calculate averages
if line_count > 0:
    avg_prompt_tokens = prompt_tokens_total / line_count
    print(f"Average prompt tokens: {avg_prompt_tokens}")
else:
    print("No matching log lines found.")

# Calculate completion tokens average only for values > 50
if completion_tokens_count > 0:
    avg_completion_tokens = completion_tokens_total / completion_tokens_count
    print(f"Average completion tokens (>50 only): {avg_completion_tokens}")
else:
    print("No completion tokens greater than 50 found.")